import math
import fitz
import re
from pathlib import Path
from tqdm import tqdm
from collections import OrderedDict
from itertools import cycle
import json

ALL_PDF_PERMS = (
    fitz.PDF_PERM_PRINT
    | fitz.PDF_PERM_MODIFY
    | fitz.PDF_PERM_COPY
    | fitz.PDF_PERM_ANNOTATE
    | fitz.PDF_PERM_FORM
    | fitz.PDF_PERM_ACCESSIBILITY
    | fitz.PDF_PERM_ASSEMBLE
    | fitz.PDF_PERM_PRINT_HQ
)
NO_PASSWORD_REQUIRED = 1
USER_AUTHENTICATED = 2
OWNER_AUTHENTICATED = 4

get_digits = lambda num: 1 if num <= 1 else math.floor(math.log10(num)) + 1
IMG_MATCH = re.compile(r"\.(jpe?g|png|bmp)", re.I)
PDF_MATCH = re.compile(r"\.(pdf)", re.I)
STD_TOCS_MATCH = re.compile(r"(\t*)(.+?)\t(\d+)")

# ========extract========
def extract_pages(doc, output_dir=".", zoom=(1.0, 1.0), caption="pdf转png"):
    "default 72DPI"
    digits = get_digits(doc.pageCount)
    with tqdm(
        enumerate(doc, 1), caption, total=doc.pageCount, ascii=True
    ) as indexed_pages:
        for index, page in indexed_pages:
            page.get_pixmap(matrix=fitz.Matrix(*zoom)).save(
                f"{output_dir}/P{str(index).zfill(digits)}.png"
            )


def extract_pics(doc, output_dir=".", caption="相册转图片"):
    img_list = (doc.get_page_images(i) for i in range(doc.pageCount))
    img_list = [j[0] for i in img_list for j in i]
    img_list = OrderedDict(zip(img_list, cycle([None])))
    digits = get_digits(len(img_list))
    with tqdm(
        enumerate(img_list.keys(), 1), caption, total=len(img_list), ascii=True
    ) as indexed_img_list:
        for index, xref in indexed_img_list:
            img_dict = doc.extract_image(xref)
            Path(
                output_dir, f'P{str(index).zfill(digits)}.{img_dict["ext"]}'
            ).write_bytes(img_dict["image"])


# ========combine========
def _add_pic(img_path, target_pdf):
    try:
        with fitz.open(img_path) as img_doc:
            rect = img_doc[0].rect
            pdf_bytes = img_doc.convert_to_pdf()
        with fitz.open("pdf", pdf_bytes) as pdf_doc:
            target_pdf.new_page(width=rect.width, height=rect.height).show_pdf_page(
                rect, pdf_doc, 0
            )
    except:
        target_pdf.new_page()


def _add_pdf(pdf_path, target_pdf, target_tocs):
    def _disrate_tocs(doc, page_num):
        # tocs = [[目录level，标题，页码],...]
        return ((1, Path(doc.name).stem, page_num),) + tuple(
            (toc[0] + 1, toc[1], toc[2] + page_num) for toc in doc.get_toc()
        )

    try:
        with fitz.open(pdf_path) as pdf_doc:
            tocs = _disrate_tocs(pdf_doc, target_pdf.pageCount + 1)
            target_tocs.extend(tocs)
            target_pdf.insert_pdf(pdf_doc)
    except:
        pass


def combine(input_list, output_path, save_options=None, caption="合并各种文件"):
    save_options = save_options or {}
    input_list = tuple(input_list)
    with fitz.Document() as result:
        all_tocs = []
        with tqdm(input_list, caption, ascii=True) as bar:
            for media_path in bar:
                if IMG_MATCH.match(media_path.suffix):
                    _add_pic(media_path, result)
                elif PDF_MATCH.match(media_path.suffix):
                    _add_pdf(media_path, result, all_tocs)
        if result.pageCount == 0:
            tqdm.write(caption + "未找到有效文件！")
        else:
            result.set_toc(all_tocs)
            result.save(output_path, **save_options)


def extract_texts(doc, option="text"):
    """options: https://pymupdf.readthedocs.io/en/latest/page.html#Page.getText"""
    return tuple(page.getText(option) for page in doc)


def dencrypt_pdf(file_path: Path, password=""):
    """无法破解需要打开密码的文件，只能破解复制权限。返回值为0表示解密失败
    不要用这个函数暴力破解密码，请考虑使用C语言的mupdf相关函数制作
    https://pymupdf.readthedocs.io/en/latest/document.html#Document.authenticate"""
    with fitz.open(file_path) as doc:
        result = doc.authenticate(password)
        if result != 0:
            doc.save(
                file_path.with_name(file_path.stem + "_dencrypted.pdf"),
                encryption=fitz.PDF_ENCRYPT_NONE,
                permissions=ALL_PDF_PERMS,
            )
            tqdm.write(file_path.stem + "解除保护成功！")
        else:
            tqdm.write(file_path.stem + "解除保护失败！")
        return result


def _read_std_tocs(text):
    result = []
    for line in text.split("\n"):
        m = STD_TOCS_MATCH.match(line)
        if m:
            result.append([len(m.group(1)) + 1, m.group(2), int(m.group(3))])
    return result


def add_tocs(file_path: Path, tocs_path: Path, encoding="utf-8"):
    with fitz.open(file_path) as doc:
        text = tocs_path.read_text(encoding, "replace")
        if tocs_path.suffix.lower() == ".txt":
            # 标准txt文件，每行是一个条目
            # 行首的tab数表明目录等级（最高级条目行首tab数为0），接着是标签，再接一个tab，然后是页码
            all_tocs = _read_std_tocs(text)
        elif tocs_path.suffix.lower() == ".json":
            # 输入为一个标准json文件，[[目录level，标题，页码],...]
            all_tocs = json.loads(text)
        else:
            all_tocs = []
        doc.set_toc(all_tocs)
        doc.saveIncr()
    tqdm.write(file_path.stem + "已添加书签！")
